pacman::p_load(corrplot, tidyverse)

data = readxl::read_excel(
  'DataSets.xlsx',
  sheet = 'survey',
  range = 'B50:AJ431'
)

ct = table(data$hroof, data$ccheard)
ct
ctest = chisq.test(ct)
ctest
# p > 0.05, H0: no association between hroof and ccheard, could not be rejected
ftest = fisher.test(ct)
ftest

ct = table(data$rain, data$cyclone)
ctest = chisq.test(ct)
ctest

ctest$observed
ctest$expected
ctest$statistic
ctest$p.value

# residuals
resid = ctest$residuals
statistic = ctest$statistic
# contribution = resid^2/statistic*100
contrib = resid^2/statistic*100
contrib

# visualization
corrplot(contrib, is.corr = FALSE, cl.pos = 'b')

# Cramer's V: strength of association
n = sum(ct)
k = min(dim(ct))
chi = ctest$statistic

cramer = sqrt(chi/(n*(k-1)))
cramer

# Cramer's V = 0.16 (weak association)

# Association between educational qualifications and sport preferences

sport = readxl::read_excel(
  'DataSets.xlsx',
  sheet = 'chi_square',
  range = 'G18:I298'
)
head(sport)
tail(sport)
str(sport)
# View(sport)

ct = table(sport$education, sport$sport)
ctest = chisq.test(ct)
ctest

contrib = (ctest$residuals)^2 / 
  (ctest$statistic) * 100

corrplot(contrib, is.corr = F,
         cl.pos = 'b')


png('fig 2. Contribution plot.png', 
    height = 6, width = 6, units = 
      'in', res = 1000)

corrplot(contrib, is.corr = F,
         cl.pos = 'b')
dev.off()
